import scrapy
import re
from doubanfile.items import DoubanfileItem
class DbSpider(scrapy.Spider):
    name = "db"
    allowed_domains = ["movie.douban.com"]
    start_urls = ["https://movie.douban.com/chart"]

    def parse(self, response):

        tr_list = response.xpath("//div[@class='indent']/div/table/tr[@class='item']")
        item = DoubanfileItem()
        for tr in tr_list:
            # 获取封面
            img_url = tr.xpath('./td[1]/a/img/@src').extract_first()
            # 电影名称
            name = tr.xpath('./td[2]/div/a/text()').extract_first()
            # 去除空白字符使用replace替换
            # name = name.replace('\n', '').replace('\r', '').replace('/', '').replace(' ', '')
            # 去除空白字符使用正则替换
            name = re.sub('(/)|(\s)', '', name)
            # 主演
            to_star = tr.xpath('./td[2]/div/p/text()').extract_first()
            item['img_src'] = img_url
            item['name'] = name
            item['to_star'] = to_star
            yield item
